Chapter 4 Data statistics
4.1 Sequencing reads statistics
sample_preprocessing %>%
summarise(Total=sum(reads_post_filt * 150 / 1000000000) %>% round(2),
mean=mean(reads_post_filt * 150 / 1000000000) %>% round(2),
sd=sd(reads_post_filt * 150 / 1000000000) %>% round(2)) %>%
unite("Average",mean, sd, sep = " ± ", remove = TRUE) %>%
tt()| Total | Average |
|---|---|
| 712.18 | 7.83 ± 8.08 |
4.2 DNA fractions
sequence_fractions <- read_counts %>%
pivot_longer(-genome, names_to = "sample", values_to = "value") %>%
group_by(sample) %>%
summarise(mags = sum(value)) %>%
left_join(sample_preprocessing, by = join_by(sample == sample)) %>%
select(sample,mags,bases_pre_filt,bases_post_filt,host_bases,metagenomic_bases) %>%
mutate(mags_bases = mags*150) %>%
mutate(lowqual_bases = bases_pre_filt - bases_post_filt) %>%
mutate(unmapped_bases = metagenomic_bases - mags_bases) %>%
mutate(unmapped_bases = ifelse(unmapped_bases < 0, 0, unmapped_bases)) %>%
select(sample, lowqual_bases, host_bases, unmapped_bases, mags_bases)
sequence_fractions %>%
mutate_at(vars(-sample), ~./1000000000) %>%
rename("Sample"=1, "Low quality"=2, "Mapped to host"=3, "Unmapped"=4, "Mapped to MAGs"=5) %>%
tt()| Sample | Low quality | Mapped to host | Unmapped | Mapped to MAGs |
|---|---|---|---|---|
| E01 | 1.9123757 | 1.7193909 | 0.50379365 | 1.83467325 |
| E02 | 0.4353280 | 4.2158016 | 0.99338589 | 0.02671065 |
| E03 | 1.3042313 | 1.7994078 | 6.42222236 | 0.01078875 |
| E16 | 0.4224530 | 0.1025580 | 0.20197846 | 4.93816200 |
| E18 | 4.7850204 | 6.9637290 | 24.05986617 | 0.52864665 |
| E21 | 1.0156349 | 2.6368389 | 0.90981533 | 0.10217775 |
| E24 | 3.7280695 | 1.1110971 | 23.81060939 | 2.34316305 |
| E25 | 1.5376473 | 7.9804608 | 6.00871944 | 0.85221990 |
| E27 | 0.4230030 | 0.0911577 | 1.95160083 | 0.44069160 |
| E28 | 1.6079642 | 1.0844487 | 4.47911541 | 0.19568490 |
| E30 | 1.7928992 | 1.4413299 | 4.00947792 | 5.12918235 |
| E31 | 0.6816818 | 0.0320553 | 0.50570530 | 2.84763450 |
| E34 | 2.1948529 | 0.8249310 | 11.67502744 | 2.54218275 |
| E38 | 0.9838001 | 4.3969737 | 1.28190071 | 0.01802595 |
| E43 | 0.4955380 | 2.8185273 | 0.82331661 | 0.00793485 |
| E44 | 0.4234418 | 5.9270472 | 0.44183940 | 0.24959295 |
| E45 | 1.5173952 | 0.8510214 | 1.95118882 | 0.84886650 |
| E48 | 2.3197861 | 1.9469865 | 7.44564809 | 5.90044110 |
| E56 | 0.5108719 | 0.0847401 | 1.09609707 | 1.82499015 |
| E58 | 0.4880990 | 2.1264312 | 1.36506017 | 0.11759265 |
| H04 | 2.5008506 | 27.4835352 | 13.14400663 | 0.03507165 |
| H06 | 0.2841785 | 2.0398764 | 1.46660757 | 0.01427955 |
| H07 | 1.4500404 | 10.9416210 | 3.39146197 | 0.01941135 |
| H08 | 0.5011987 | 0.2131668 | 0.57356153 | 0.45570180 |
| H09 | 1.2653958 | 10.8166410 | 7.00507483 | 0.15877860 |
| H10 | 0.7942211 | 1.3782558 | 4.03545973 | 0.11882340 |
| H12 | 2.5813911 | 8.4009948 | 4.51254046 | 0.84020835 |
| H15 | 0.1917405 | 1.5658842 | 0.74185414 | 0.00826125 |
| H16 | 2.1486462 | 4.0113864 | 9.56424671 | 3.69521370 |
| H19 | 0.5557962 | 4.2411288 | 2.50314959 | 0.02286150 |
| H20 | 0.9677842 | 4.3890999 | 3.78083553 | 0.07778835 |
| H23 | 0.9420728 | 3.9595926 | 1.30102424 | 0.29551170 |
| H25 | 1.0738812 | 7.7087100 | 3.42255644 | 0.68422815 |
| H30 | 0.4902154 | 1.3423326 | 0.75352358 | 2.29694205 |
| H31 | 1.8416387 | 9.7206468 | 4.43631049 | 0.48033270 |
| H32 | 0.5427161 | 5.8415880 | 1.38975295 | 0.06290070 |
| H34 | 1.3188004 | 12.4245048 | 5.12752821 | 0.02616405 |
| H37 | 2.5737627 | 27.8754720 | 9.15837446 | 0.25084785 |
| H39 | 0.2938515 | 2.4124689 | 0.75581958 | 0.02116935 |
| H40 | 1.1315544 | 3.8592006 | 5.83728129 | 0.42791340 |
| H43 | 0.7161526 | 6.3701718 | 2.50320009 | 0.03324300 |
| H45 | 0.9215094 | 1.5407343 | 3.94772969 | 0.81685470 |
| H47 | 0.9581476 | 5.6254728 | 2.15816743 | 0.32372385 |
| P01 | 0.5733407 | 1.8086574 | 1.33450269 | 0.76149270 |
| P05 | 1.1668645 | 9.3271338 | 0.63288528 | 0.36958890 |
| P09 | 0.7574713 | 7.1642826 | 0.52521364 | 0.04058895 |
| P14 | 0.5034011 | 3.8678286 | 0.03866504 | 0.64239540 |
| P20 | 0.7666679 | 2.0253963 | 0.68358450 | 0.54081750 |
| P25 | 0.2950900 | 4.5486294 | 0.50927300 | 0.38403045 |
| P28 | 4.4616492 | 30.8235540 | 17.16905293 | 1.83789930 |
| P33 | 3.3222731 | 33.2508960 | 7.86558092 | 0.43485525 |
| P34 | 0.3459813 | 0.8618043 | 1.84351514 | 0.01439085 |
| P36 | 1.2281449 | 10.5844332 | 0.28646419 | 0.71007735 |
| P41 | 1.6479428 | 25.4247288 | 3.56446783 | 1.35787020 |
| P43 | 3.4204299 | 3.2932512 | 7.78711290 | 13.30940625 |
| P45 | 1.6790221 | 2.9649723 | 5.42286382 | 0.12008145 |
| P47 | 0.4775963 | 2.8718187 | 1.06401988 | 0.80290395 |
| P48 | 3.0192702 | 0.9945204 | 17.82268997 | 6.15753405 |
| P49 | 0.8859881 | 15.7503030 | 0.32349316 | 0.16673670 |
| P51 | 1.8101944 | 8.2580928 | 3.46629537 | 2.64862665 |
| P53 | 1.5688483 | 24.5797020 | 2.14349384 | 0.12960855 |
| P56 | 0.9749002 | 12.3956724 | 0.19719518 | 1.01352285 |
| P58 | 0.7963638 | 6.4119048 | 1.11305236 | 0.07796910 |
| P60 | 2.3930517 | 33.2359848 | 8.57518064 | 0.20491080 |
| P64 | 0.2656280 | 0.3023889 | 1.17634560 | 0.47208750 |
| P65 | 0.4114934 | 0.2113239 | 0.89754587 | 1.90757895 |
| P67 | 1.7026024 | 17.3557032 | 0.62786285 | 2.02223145 |
| P69 | 0.4073834 | 1.8827937 | 1.76632754 | 0.23484960 |
| P72 | 0.2702016 | 3.9134910 | 0.57524887 | 0.20525790 |
| P75 | 1.4392490 | 2.6274414 | 6.26632261 | 1.62440925 |
| P78 | 0.9165986 | 2.0786076 | 1.22668730 | 0.48372150 |
| P79 | 1.7376207 | 12.1993284 | 7.73575004 | 1.52807535 |
sequence_fractions %>%
pivot_longer(!sample, names_to = "fraction", values_to = "value") %>%
mutate(value = value / 1000000000) %>%
mutate(fraction = factor(fraction, levels = c("lowqual_bases","host_bases","unmapped_bases","mags_bases"))) %>%
ggplot(., aes(x = sample, y = value, fill=fraction)) +
geom_bar(position="stack", stat = "identity") +
scale_fill_manual(name="Sequence type",
breaks=c("lowqual_bases","host_bases","unmapped_bases","mags_bases"),
labels=c("Low quality","Mapped to host","Unmapped","Mapped to MAGs"),
values=c("#CCCCCC", "#bcdee1", "#d8b8a3","#93655c"))+
labs(x = "Samples", y = "Amount of data (GB)") +
theme_classic() +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1, size=6),legend.position = "bottom")| species | mean_host_perc | sd_host_perc | max_host_perc | min_host_perc |
|---|---|---|---|---|
| Eb | 24.0105851 | 26.0487087 | 77.523636 | 0.39599794 |
| Ha | 36.4671066 | 15.3892853 | 58.926064 | 6.53771139 |
| Pk | 56.5200310 | 29.2493571 | 92.136483 | 1.81265533 |
| NA | 0.5625856 | 0.4379119 | 1.079399 | 0.02651141 |